In [1]:
import mxnet as mx
import numpy as np
from mxnet import gluon
from tqdm import tqdm_notebook as tqdm
In [2]:
ctx = mx.cpu()
In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
In [4]:
def transform(data, label):
return data.astype(np.float32) / 255, label.astype(np.float32)
In [5]:
train_data = gluon.data.DataLoader(dataset=gluon.data.vision.MNIST(train=True, transform=transform),
batch_size=batch_size,
shuffle=True)
test_data = gluon.data.DataLoader(dataset=gluon.data.vision.MNIST(train=False, transform=transform),
batch_size=batch_size,
shuffle=False)
In [6]:
num_hidden = 256
In [7]:
net = gluon.nn.Sequential()
with net.name_scope():
###########################
# Adding first hidden layer
###########################
net.add(gluon.nn.Dense(units=num_hidden,
activation="relu"))
###########################
# Adding dropout with rate .5 to the first hidden layer
###########################
net.add(gluon.nn.Dropout(rate=0.5))
###########################
# Adding first hidden layer
###########################
net.add(gluon.nn.Dense(units=num_hidden,
activation="relu"))
###########################
# Adding dropout with rate .5 to the second hidden layer
###########################
net.add(gluon.nn.Dropout(rate=0.5))
###########################
# Adding the output layer
###########################
net.add(gluon.nn.Dense(units=num_outputs))
In [8]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
In [9]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [10]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
In [11]:
def evaluate_accuracy(data_iterator, net, mode='train'):
acc = mx.metric.Accuracy()
for i, (data, label) in enumerate(data_iterator):
data = data.as_in_context(ctx).reshape([-1, 784])
label = label.as_in_context(ctx)
if mode == 'train':
with mx.autograd.train_mode():
output = net(data)
else:
with mx.autograd.predict_mode():
output = net(data)
predictions = mx.nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
In [12]:
epochs = 10
smoothing_constant = .01
In [13]:
for e in tqdm(range(epochs)):
for i, (data, label) in tqdm(enumerate(train_data)):
data = data.as_in_context(ctx).reshape([-1, 784])
label = label.as_in_context(ctx)
with mx.autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(data.shape[0])
##########################
# Keep a moving average of the losses
##########################
curr_loss = mx.nd.mean(loss).asscalar()
moving_loss = (curr_loss if ((i == 0) and (e == 0))
else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)
test_accuracy = evaluate_accuracy(test_data, net, mode='test')
train_accuracy = evaluate_accuracy(train_data, net, mode='train')
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
(e, moving_loss, train_accuracy, test_accuracy))
In [14]:
for i, (data, label) in enumerate(test_data):
data = data[0].as_in_context(ctx).reshape([-1, 784])
label = label[0].as_in_context(ctx)
with mx.autograd.record(train_mode=False):
output = net(data)
predictions = mx.nd.argmax(output, axis=1)
print(predictions)
break
In [15]:
test_accuracy = evaluate_accuracy(test_data, net, mode='test')
In [16]:
test_accuracy
Out[16]:
In [17]:
test_accuracy = evaluate_accuracy(test_data, net, mode='test')
In [18]:
test_accuracy
Out[18]: